---
title: "Instigator and Fractionalization Extension"
author: "Sima Biondi, Priyanka Sethy, Natalie Ayers"
date: "2022-12-10"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(foreign)
library(mediation)
library(gsfuns)
library("sandwich")
library("lmtest")
library("stargazer")
library("lfe")
library("plm")
library(dplyr)
library(here)
```

## Data Ingestion and Preparation

```{r read in data}
an.df <- read.dta(here("epr_segment_level_analysis.dta"))

an.df$pys <- an.df$peaceyears
an.df$pys2 <- an.df$peaceyears^2
an.df$pys3 <- an.df$peaceyears^3

tfrac_group <- an.df %>%
              dplyr::select(c(year, gwgroupid, countries_gwid, ag_id, split, tfrac, tfrac_incr, tfrac_incr_post1946,
                       ln_ag_area_sqkm, status_excl, downgraded2, rbal, warhist, 
                       ln_capdist, ln_rgdppc_lag, ln_pop_lag, colonial_past, ln_state_age, ag_incidence_flag_lag,
                       pys, pys2, pys3)) %>% 
                unique()

# CONSERVATIVE JOIN OF COW - ACD
ged_cow <- read.csv(here("ged_cow_confirmed1.csv"))

ged_cow_tojoin <- ged_cow %>%
  dplyr::select(-c(min_year, max_year, country)) %>%
  unique()

ged_cow_tfrac <- merge(ged_cow_tojoin, tfrac_group, by.x = c("year", "gwid", "gwgroupid"),
                       by.y = c("year", "countries_gwid", "gwgroupid"))

# LESS CONSERVATIVE JOIN OF COW - ACD
ged_cow01 <- read.csv(here("ged_cow_confirmedmaybe01.csv"))

ged_cow01_tojoin <- ged_cow01 %>%
  dplyr::select(-c(min_year, max_year, country)) %>%
  unique()

ged_cow01_tfrac <- merge(ged_cow01_tojoin, tfrac_group, by.x = c("year", "gwid", "gwgroupid"),
                       by.y = c("year", "countries_gwid", "gwgroupid"))
```

## Conservative Correlates of War - ACD Combination

```{r conservative regression no covars}
ged_cow_tfrac_lm <- lm(initiator_state ~ tfrac, data=ged_cow_tfrac)
summary(ged_cow_tfrac_lm)
```

```{r conservative regression full, warning=FALSE}
# define variables for use in all models, including DV
vars_logit <- c("initiator_state", "ln_ag_area_sqkm", "ag_incidence_flag_lag",
                "status_excl", "downgraded2",
                "rbal", "warhist", "ln_capdist", "ln_rgdppc_lag",
                "ln_pop_lag",  "colonial_past", "ln_state_age",
                "pys", "pys2", "pys3")
# identify AG "treatment" variables to be analyzed separately
treat.vars <- c("split", "tfrac", "tfrac_incr_post1946")

# for each "treatment" variable, run logit model
for(treat_var in treat.vars){
  full_lhs <- append(treat_var, vars_logit)
  onset_logit <- glm(initiator_state ~ ., 
                     data=ged_cow_tfrac[,full_lhs], family="binomial")
  assign(paste0("logit_",treat_var), onset_logit)
}

# include clustered standard errors for each generated model
logit_split_coefs <- coeftest(logit_split, 
                              vcov. = vcovCL(logit_split, 
                                             cluster = ged_cow_tfrac$ag_id))
logit_tfrac_coefs <- coeftest(logit_tfrac, 
                              vcov. = vcovCL(logit_tfrac, 
                                             cluster = ged_cow_tfrac$ag_id))
logit_tfrac_1946_coefs <- coeftest(logit_tfrac_incr_post1946, 
                                   vcov. = vcovCL(logit_tfrac_incr_post1946, 
                                                  cluster = ged_cow_tfrac$ag_id))

se_robust <- function(x){
  coeftest(x, vcov. = vcovCL(x, cluster = ged_cow_tfrac$ag_id))[, "Std. Error"]
}


# Display results of logit model
stargazer(logit_split, logit_tfrac,
          type="latex",
          dep.var.labels.include = FALSE,
          title = "Fractionalization and State Initiation",
          omit = c("pys", "pys2","pys3","ln_state_age","ag_incidence_flag_lag"),
          dep.var.caption = "State-Initiated Conflict",
          se = lapply(list(logit_split, logit_tfrac), se_robust),
          covariate.labels = c("Divided group", "Fractionalization", 
                               "Territory sq.km, log","Exclusion",
                               "Downgraded", "Relative size", "War history",
                               "Distance to capital, log", "GDP, lag, log",
                               "Population, log","Colonial history"))
```

## Case Study of Divided with High/Low Fractionalization

```{r examine divided groups with high vs low frac}
ged_cow_tfrac_div <- ged_cow_tfrac %>%
                  filter(split == 1)

ged_cow_tfrac_div %>% filter(tfrac < 0.66219) %>% nrow()
```

## Less Conservative COW - ACD Combination

```{r conservative regression no covars}
ged_cow01_tfrac_lm <- lm(initiator_state ~ tfrac, data=ged_cow01_tfrac)
summary(ged_cow01_tfrac_lm)
```

```{r conservative regression full, warning=FALSE}
# define variables for use in all models, including DV
vars_logit <- c("initiator_state", "ln_ag_area_sqkm", "ag_incidence_flag_lag",
                "status_excl", "downgraded2",
                "rbal", "warhist", "ln_capdist", "ln_rgdppc_lag",
                "ln_pop_lag",  "colonial_past", "ln_state_age",
                "pys", "pys2", "pys3")
# identify AG "treatment" variables to be analyzed separately
treat.vars <- c("split", "tfrac", "tfrac_incr_post1946")

# for each "treatment" variable, run logit model
for(treat_var in treat.vars){
  full_lhs <- append(treat_var, vars_logit)
  onset_logit <- glm(initiator_state ~ ., 
                     data=ged_cow01_tfrac[,full_lhs], family="binomial")
  assign(paste0("logit_",treat_var), onset_logit)
}

# include clustered standard errors for each generated model
logit_split_coefs <- coeftest(logit_split, 
                              vcov. = vcovCL(logit_split, 
                                             cluster = ged_cow01_tfrac$ag_id))
logit_tfrac_coefs <- coeftest(logit_tfrac, 
                              vcov. = vcovCL(logit_tfrac, 
                                             cluster = ged_cow01_tfrac$ag_id))
logit_tfrac_1946_coefs <- coeftest(logit_tfrac_incr_post1946, 
                                   vcov. = vcovCL(logit_tfrac_incr_post1946, 
                                                  cluster = ged_cow01_tfrac$ag_id))

se_robust <- function(x){
  coeftest(x, vcov. = vcovCL(x, cluster = ged_cow01_tfrac$ag_id))[, "Std. Error"]
}


# Display results of logit model
stargazer(logit_split, logit_tfrac,
          type="text",
          dep.var.labels.include = FALSE,
          omit = c("pys", "pys2","pys3"),
          dep.var.caption = "State-Initiated Conflict",
          se = lapply(list(logit_split, logit_tfrac), se_robust),
          covariate.labels = c("Divided group", "Fractionalization", 
                               "Territory sq.km, log",
                               "Ongoing conflict, lag", "Exclusion",
                               "Downgraded", "Relative size", "War history",
                               "Distance to capital, log", "GDP, lag, log",
                               "Population, log","Colonial history",
                               "State age, log"))
```

### Resources Used

https://stackoverflow.com/questions/40136434/appending-statistics-to-coeftest-output-to-include-in-stargazer-tables
